library("tidyverse")
library("plyr")
library("dplyr")
library("ggplot2")
library("scales")
library("ggpubr")
library("gridExtra")
library("grid")
library("GGally")
library("vcfR") # for extracting genotype data from a vcf file
library("data.table")
library("stringr")
library("janitor")
library("plotly") # for the 3d plots
library("ggfortify")
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE,
fig.width = 10,
fig.asp = 0.6,
out.width = "100%")
Extract genotypes for each site and individual. The metadata for all samples can be found in here.
vcf <- read.vcfR("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/Q40BIALLDP16HDP40mis.5Chr7/Q40BIALLDP16HDP40mis.5Chr7.recode.vcf", verbose = FALSE )
vcf
## ***** Object of Class vcfR *****
## 223 samples
## 7 CHROMs
## 35,169 variants
## Object size: 293.5 Mb
## 0 percent missing data
## ***** ***** *****
# extract the genotype for each site in each individual
gt <- extract.gt(vcf, element = "GT")
gt <- as.data.frame(t(gt)) %>%
rownames_to_column("ID")
#clean the ID names
gt$ID <- sub("_[^_]+$", "", gt$ID)
table <- gt %>%
t() %>%
as.data.frame() %>%
row_to_names(row_number = 1)
read the meta data table and set the family
meta <- read_csv("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/meta_more.csv")
# set the families
family = grep("fnd",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
we expect all F2 offspring to be homozygotic (0/0) like their parents (F1)
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/0")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 2540 0.794 0.206 3.94e-4 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 2382 0.788 0.211 8.40e-4 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 2452 0.799 0.200 8.16e-4 63 Egg 2 Egg no adu…
## 4 63_64d_grn 2432 0.980 0.0201 0 63 Egg 2 Egg no adu…
## 5 600_601a_g… 1163 0.988 0.0120 0 600 Adult 2 male no adu…
## 6 600_601b_g… 1333 0.990 0.00975 0 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (0/0) x female (0/0)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (0/0) x female (0/0)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
we expect all F2 offspring to be homozygotic (1/1) like their parents (F1)
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "1/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
mutate(arcsin=asin(sqrt(freq))) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 1629 1.23e-3 0.178 0.821 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 1550 6.45e-4 0.188 0.812 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 1535 6.51e-4 0.180 0.819 63 Egg 2 Egg no adu…
## 4 63_64d_grn 1537 6.51e-4 0.0143 0.985 63 Egg 2 Egg no adu…
## 5 600_601a_g… 643 0 0.0218 0.978 600 Adult 2 male no adu…
## 6 600_601b_g… 778 0 0.00514 0.995 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>% #dplyr::filter(sex == "male") %>%
# filter(!sample =="502_503d_grn") %>%
# filter(!sample =="43_44c_grn") %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("yes"="circle", "no"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (1/1) x female (1/1)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (1/1) x female (1/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/0")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
# 3d
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 3511 0.277 0.560 0.163 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 3230 0.236 0.549 0.215 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 3248 0.248 0.579 0.173 63 Egg 2 Egg no adu…
## 4 63_64d_grn 3259 0.0172 0.977 0.00583 63 Egg 2 Egg no adu…
## 5 600_601a_gr… 728 0.0110 0.988 0.00137 600 Adult 2 male no adu…
## 6 600_601b_grn 861 0.00929 0.990 0.00116 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>% #dplyr::filter(sex == "male") %>%
# filter(!sample =="502_503d_grn") %>%
# filter(!sample =="43_44c_grn") %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (0/0) x female (0/1)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (0/0) x female (0/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "1/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 2383 0.207 0.595 0.198 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 2222 0.234 0.482 0.284 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 2256 0.218 0.590 0.193 63 Egg 2 Egg no adu…
## 4 63_64d_grn 2271 0.00705 0.985 0.00749 63 Egg 2 Egg no adu…
## 5 600_601a_gr… 624 0.00321 0.997 0 600 Adult 2 male no adu…
## 6 600_601b_grn 758 0 0.996 0.00396 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (1/1) x female (0/1)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (1/1) x female (0/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 49 0.531 0.449 0.0204 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 42 0.5 0.5 0 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 44 0.432 0.568 0 63 Egg 2 Egg no adu…
## 4 63_64d_grn 47 0.681 0.298 0.0213 63 Egg 2 Egg no adu…
## 5 600_601a_gr… 165 0.952 0.0485 0 600 Adult 2 male no adu…
## 6 600_601b_grn 179 0.944 0.0559 0 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (0/1) x female (0/0)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (1/1) x female (0/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 3 0 0.333 0.667 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 3 0 0.333 0.667 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 3 0 0 1 63 Egg 2 Egg no adu…
## 4 63_64d_grn 2 0 0.5 0.5 63 Egg 2 Egg no adu…
## 5 600_601a_gr… 129 0 0.0155 0.984 600 Adult 2 male no adu…
## 6 600_601b_grn 160 0 0.0188 0.981 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (0/1) x female (1/1)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (0/1) x female (1/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))
# Prepare table with observed and expected counts, per family:
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_fnd")), all_vars(. == "0/1")) %>% # force F0 female to be homo, like her son
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
# mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n)
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs) %>% mutate(sample = as.character(sample))
# replace "NA" with zero value when gt doesn't exists
samples <- data.frame(sample = rep(unique(observed$sample), each =3), gt = rep(c("0/0", "0/1", "1/1"))) # add all possible genotypes
data <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
pivot_wider(names_from="gt", values_from=freq, id_cols=c(sample,total)) %>%
left_join(meta, by="sample") %>%
rename(homo_ref="0/0") %>%
rename(hetero="0/1") %>%
rename(homo_alt="1/1")
head(data)
## # A tibble: 6 × 10
## # Groups: sample [6]
## sample total homo_…¹ hetero homo_…² family dev_s…³ gener…⁴ sex adult…⁵
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 63_64a_grn 62 0.258 0.710 0.0323 63 Nymph 2 Nymph no adu…
## 2 63_64b_grn 65 0.323 0.677 0 63 Nymph 2 Nymph no adu…
## 3 63_64c_grn 61 0.262 0.705 0.0328 63 Egg 2 Egg no adu…
## 4 63_64d_grn 65 0.308 0.677 0.0154 63 Egg 2 Egg no adu…
## 5 600_601a_gr… 459 0 0.993 0.00654 600 Adult 2 male no adu…
## 6 600_601b_grn 560 0.0125 0.980 0.00714 600 Egg 2 Egg no adu…
## # … with abbreviated variable names ¹homo_ref, ²homo_alt, ³dev_stage,
## # ⁴generation, ⁵adult_sisters
data %>%
plot_ly(x= ~homo_ref, y =~hetero, z = ~homo_alt,
color = ~sex,
colors = c("Egg"="#FFD301","Nymph"="#9BA4B5","female"="#EB455F","male"="#1982c4"),
symbol= ~adult_sisters,
symbols= c("with adult sister"="circle", "no adult sister"= "circle-open"),
text=~sample,
hoverinfo= "text") %>%
add_markers() %>%
layout(title = 'F2 offspring of F1 male (0/1) x female (0/1)',
scene = list(
xaxis = list(title = '0/0', nticks = 3, range = c(0,1)),
yaxis = list(title = '0/1', nticks = 3, range = c(0,1)),
zaxis = list(title = '1/1', nticks = 3, range = c(0,1))))
# 2d
df <- left_join(samples, observed, by=c("sample","gt")) %>%
replace(is.na(.),0) %>% # replace "NA" with zero value when gt doesn't exists
group_by(sample) %>%
mutate(total = as.numeric(sum(obs))) %>%
mutate(freq=obs/total) %>%
left_join(meta, by="sample")
p <- df %>%
filter(total>10) %>%
ggplot(aes(y =freq, x = sex, color = gt,
shape = adult_sisters,
text = paste("sample:", sample,
", n:", total))) +
geom_jitter(width=0.1, size=2) +
scale_shape_manual(values=c(1,19)) +
scale_color_manual(values=c("#ffbf00", "#66b032","#1982c4")) +
theme_classic() +
ggtitle('F2 offspring of F1 male (0/1) x female (0/1)') +
guides(color = "none") +
facet_wrap( ~gt)
ggplotly(p) %>%
layout(legend = list(orientation = "h", x = 0, y = -0.1))